{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Musicians - Hard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading spark-stubs, spark-hive\n",
      "Adding Hive conf dir /opt/hive/conf to classpath\n",
      "Creating SparkSession\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: No SLF4J providers were found.\n",
      "SLF4J: Defaulting to no-operation (NOP) logger implementation\n",
      "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"http://jupyter:4040\">Spark UI</a>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$                                  \n",
       "\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n",
       "\n",
       "\u001b[39m\n",
       "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@6fe5632b"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import $ivy.`org.apache.spark::spark-sql:3.4.0`\n",
    "\n",
    "import org.apache.log4j.{Level, Logger}\n",
    "Logger.getLogger(\"org\").setLevel(Level.OFF)\n",
    "\n",
    "import org.apache.spark._\n",
    "import org.apache.spark.sql._\n",
    "import org.apache.spark.sql.types._\n",
    "import org.apache.spark.sql.functions._\n",
    "import org.apache.spark.sql.expressions.Window\n",
    "\n",
    "val spark = {\n",
    "    NotebookSparkSession.builder()\n",
    "    .progress(false)\n",
    "    .appName(\"app16-3\")\n",
    "    // .master(\"spark://192.168.31.31:7077\")\n",
    "    .master(\"local[*]\")\n",
    "    .config(\"spark.sql.warehouse.dir\", \n",
    "            \"hdfs://192.168.31.31:9000/user/hive/warehouse\") \n",
    "    .config(\"spark.cores.max\", \"4\") \n",
    "    .config(\"spark.executor.instances\", \"1\") \n",
    "    .config(\"spark.executor.cores\", \"2\") \n",
    "    .config(\"spark.executor.memory\", \"10g\") \n",
    "    .config(\"spark.shuffle.service.enabled\", \"false\") \n",
    "    .config(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "    .config(\"spark.sql.catalogImplementation\", \"hive\")\n",
    "    .config(\"spark.sql.repl.eagerEval.enabled\", \"true\")\n",
    "    .config(\"spark.driver.allowMultipleContexts\", \"true\")\n",
    "    .getOrCreate()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36mspark.sqlContext.implicits._\n",
       "\u001b[39m\n",
       "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
       "\u001b[36mhiveCxt\u001b[39m: \u001b[32msql\u001b[39m.\u001b[32mhive\u001b[39m.\u001b[32mHiveContext\u001b[39m = org.apache.spark.sql.hive.HiveContext@57b5383f"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import spark.implicits._\n",
    "import spark.sqlContext.implicits._\n",
    "def sc = spark.sparkContext\n",
    "val hiveCxt = new org.apache.spark.sql.hive.HiveContext(sc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "// Credit to Aivean\n",
    "implicit class RichDF(val ds:DataFrame) {\n",
    "    def showHTML(limit: Int = 50, truncate: Int = 100) = {\n",
    "        import xml.Utility.escape\n",
    "        val data = ds.take(limit)\n",
    "        val header = ds.schema.fieldNames.toSeq        \n",
    "        val rows: Seq[Seq[String]] = data.map { row =>\n",
    "          row.toSeq.map {cell =>\n",
    "            val str = cell match {\n",
    "              case null => \"null\"\n",
    "              case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
    "              case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
    "              case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
    "              case _ => cell.toString\n",
    "            }\n",
    "            if (truncate > 0 && str.length > truncate) {\n",
    "              // do not show ellipses for strings shorter than 4 characters.\n",
    "              if (truncate < 4) str.substring(0, truncate)\n",
    "              else str.substring(0, truncate - 3) + \"...\"\n",
    "            } else {\n",
    "              str\n",
    "            }\n",
    "          }: Seq[String]\n",
    "        }\n",
    "    publish.html(s\"\"\" <table>\n",
    "                <tr>\n",
    "                 ${header.map(h => s\"<th>${escape(h)}</th>\").mkString}\n",
    "                </tr>\n",
    "                ${rows.map {row =>\n",
    "                  s\"<tr>${row.map{c => s\"<td>${escape(c)}</td>\" }.mkString}</tr>\"\n",
    "                }.mkString}\n",
    "            </table>\n",
    "        \"\"\")\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mband\u001b[39m: \u001b[32mDataFrame\u001b[39m = [band_no: int, band_name: string ... 4 more fields]\n",
       "\u001b[36mcomposer\u001b[39m: \u001b[32mDataFrame\u001b[39m = [comp_no: int, comp_is: int ... 1 more field]\n",
       "\u001b[36mcomposition\u001b[39m: \u001b[32mDataFrame\u001b[39m = [c_no: int, comp_date: string ... 2 more fields]\n",
       "\u001b[36mconcert\u001b[39m: \u001b[32mDataFrame\u001b[39m = [concert_no: int, concert_venue: string ... 3 more fields]\n",
       "\u001b[36mhas_composed\u001b[39m: \u001b[32mDataFrame\u001b[39m = [cmpr_no: int, cmpn_no: int]\n",
       "\u001b[36mmusician\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_no: int, m_name: string ... 4 more fields]\n",
       "\u001b[36mperformance\u001b[39m: \u001b[32mDataFrame\u001b[39m = [pfrmnc_no: int, gave: int ... 3 more fields]\n",
       "\u001b[36mperformer\u001b[39m: \u001b[32mDataFrame\u001b[39m = [perf_no: int, perf_is: int ... 2 more fields]\n",
       "\u001b[36mplace\u001b[39m: \u001b[32mDataFrame\u001b[39m = [place_no: int, place_town: string ... 1 more field]\n",
       "\u001b[36mplays_in\u001b[39m: \u001b[32mDataFrame\u001b[39m = [player: int, band_id: int]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val band = hiveCxt.table(\"sqlzoo.band\")\n",
    "val composer = hiveCxt.table(\"sqlzoo.composer\")\n",
    "val composition = hiveCxt.table(\"sqlzoo.composition\")\n",
    "val concert = hiveCxt.table(\"sqlzoo.concert\")\n",
    "val has_composed = hiveCxt.table(\"sqlzoo.has_composed\")\n",
    "val musician = hiveCxt.table(\"sqlzoo.musician\")\n",
    "val performance = hiveCxt.table(\"sqlzoo.performance\")\n",
    "val performer = hiveCxt.table(\"sqlzoo.performer\")\n",
    "val place = hiveCxt.table(\"sqlzoo.place\")\n",
    "val plays_in = hiveCxt.table(\"sqlzoo.plays_in\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 11.\n",
    "**List the name and town of birth of any performer born in the same city as James First.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>m_name</th><th>place_town</th>\n",
       "                </tr>\n",
       "                <tr><td>Alan Fluff</td><td>London</td></tr><tr><td>Andy Jones</td><td>London</td></tr><tr><td>Theo Mengel</td><td>London</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(musician.filter(musician(\"m_name\") !== \"James First\")\n",
    " .join(musician.filter(musician(\"m_name\") === \"James First\")\n",
    "       .join(place, (musician(\"born_in\") === place(\"place_no\")))\n",
    "       .select(\"born_in\", \"place_town\"),\n",
    "       \"born_in\")\n",
    " .select(\"m_name\", \"place_town\")\n",
    " .orderBy(\"m_name\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 12.\n",
    "**Create a list showing for EVERY musician born in Britain the number of compositions and the number of instruments played.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>m_no</th><th>m_name</th><th>n_composition</th><th>n_instrument</th>\n",
       "                </tr>\n",
       "                <tr><td>14</td><td>Alan Fluff</td><td>0</td><td>2</td></tr><tr><td>19</td><td>Andy Jones</td><td>1</td><td>0</td></tr><tr><td>12</td><td>Davis Heavan</td><td>0</td><td>3</td></tr><tr><td>18</td><td>Elsie James</td><td>0</td><td>3</td></tr><tr><td>1</td><td>Fred Bloggs</td><td>2</td><td>0</td></tr><tr><td>17</td><td>Freda Miles</td><td>2</td><td>0</td></tr><tr><td>4</td><td>Harriet Smithson</td><td>0</td><td>2</td></tr><tr><td>8</td><td>Harry Forte</td><td>2</td><td>3</td></tr><tr><td>3</td><td>Helen Smyth</td><td>1</td><td>1</td></tr><tr><td>5</td><td>James First</td><td>4</td><td>1</td></tr><tr><td>16</td><td>James Quick</td><td>0</td><td>2</td></tr><tr><td>21</td><td>James Steeple</td><td>0</td><td>0</td></tr><tr><td>10</td><td>Jeff Dawn</td><td>1</td><td>2</td></tr><tr><td>2</td><td>John Smith</td><td>0</td><td>3</td></tr><tr><td>20</td><td>Louise Simpson</td><td>0</td><td>3</td></tr><tr><td>13</td><td>Lovely Time</td><td>2</td><td>0</td></tr><tr><td>9</td><td>Phil Hot</td><td>3</td><td>0</td></tr><tr><td>11</td><td>Rose Spring</td><td>2</td><td>0</td></tr><tr><td>22</td><td>Steven Chaytors</td><td>0</td><td>0</td></tr><tr><td>7</td><td>Sue Little</td><td>2</td><td>1</td></tr><tr><td>6</td><td>Theo Mengel</td><td>0</td><td>3</td></tr><tr><td>15</td><td>Tony Smythe</td><td>1</td><td>0</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36mt\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_no: int, m_name: string ... 1 more field]\n",
       "\u001b[36mp\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_no: int, n_instrument: bigint]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val t = (musician\n",
    "     .join(place.filter(place(\"place_country\").isin(\"England\", \"Scotland\")),\n",
    "           (musician(\"born_in\") === place(\"place_no\")), joinType=\"left\")\n",
    "     .join(composer, (musician(\"m_no\") === composer(\"comp_is\")), \n",
    "           joinType=\"left\")\n",
    "     .join(has_composed, (composer(\"comp_no\") === has_composed(\"cmpr_no\")), \n",
    "           joinType=\"left\")\n",
    "     .join(composition, (has_composed(\"cmpn_no\") === composition(\"c_no\")), \n",
    "           joinType=\"left\")\n",
    "     .groupBy(\"m_no\", \"m_name\")\n",
    "     .agg(count(\"c_no\").alias(\"n_composition\")))\n",
    "val p = (musician.join(performer, (musician(\"m_no\") === performer(\"perf_is\")),\n",
    "                       joinType=\"left\")\n",
    "     .select(\"m_no\", \"instrument\")\n",
    "     .distinct()\n",
    "     .groupBy(\"m_no\")\n",
    "     .agg(count(\"instrument\").alias(\"n_instrument\")))\n",
    "t.join(p, \"m_no\").orderBy(\"m_name\").showHTML()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 13.\n",
    "**Give the band name, conductor and contact of the bands performing at the most recent concert in the Royal Albert Hall.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>band_name</th><th>contact</th><th>conductor</th>\n",
       "                </tr>\n",
       "                <tr><td>Somebody Loves this</td><td>Theo Mengel</td><td>Alan Fluff</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36mrah\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [concert_no: int, concert_venue: string ... 3 more fields]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val rah = (concert.filter(concert(\"concert_venue\") === \"Royal Albert Hall\")\n",
    "           .orderBy(col(\"con_date\").desc)\n",
    "           .limit(1))\n",
    "(band\n",
    " .join(musician.withColumnRenamed(\"m_name\", \"contact\")\n",
    "       .alias(\"mus1\"), \n",
    "       (band(\"band_contact\") === col(\"mus1.m_no\")))\n",
    " .join(performance, (band(\"band_no\") === performance(\"gave\")))\n",
    " .join(musician.withColumnRenamed(\"m_name\", \"conductor\")\n",
    "       .alias(\"mus2\"),\n",
    "       (performance(\"conducted_by\") === col(\"mus2.m_no\")))\n",
    " .join(rah, (performance(\"performed_in\") === rah(\"concert_no\")))\n",
    " .select(\"band_name\", \"contact\", \"conductor\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 14.\n",
    "**Give a list of musicians associated with Glasgow. Include the name of the musician and the nature of the association - one or more of \"LIVES_IN\", \"BORN_IN\", \"PERFORMED_IN\" AND \"IN_BAND_IN\".**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>m_name</th><th>assoc</th>\n",
       "                </tr>\n",
       "                <tr><td>Louise Simpson</td><td>BORN_IN</td></tr><tr><td>Steven Chaytors</td><td>BORN_IN</td></tr><tr><td>Lovely Time</td><td>BORN_IN</td></tr><tr><td>Jeff Dawn</td><td>LIVES_IN</td></tr><tr><td>Harriet Smithson</td><td>LIVES_IN</td></tr><tr><td>James Steeple</td><td>LIVES_IN</td></tr><tr><td>Andy Jones</td><td>LIVES_IN</td></tr><tr><td>Louise Simpson</td><td>LIVES_IN</td></tr><tr><td>Lovely Time</td><td>IN_BAND_IN</td></tr><tr><td>Jeff Dawn</td><td>IN_BAND_IN</td></tr><tr><td>Freda Miles</td><td>IN_BAND_IN</td></tr><tr><td>Elsie James</td><td>IN_BAND_IN</td></tr><tr><td>Tony Smythe</td><td>IN_BAND_IN</td></tr><tr><td>Alan Fluff</td><td>IN_BAND_IN</td></tr><tr><td>Davis Heavan</td><td>IN_BAND_IN</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36mt1\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_name: string, place_town: string ... 1 more field]\n",
       "\u001b[36mt2\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_name: string, place_town: string ... 1 more field]\n",
       "\u001b[36mt3\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_name: string, place_town: string ... 1 more field]\n",
       "\u001b[36mt4\u001b[39m: \u001b[32mDataFrame\u001b[39m = [m_name: string, place_town: string ... 1 more field]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val t1 = (musician.join(place, (musician(\"born_in\") === place(\"place_no\")))\n",
    "      .select(\"m_name\", \"place_town\")\n",
    "      .withColumn(\"assoc\", lit(\"BORN_IN\")))\n",
    "val t2 = (musician.join(place, (musician(\"living_in\") === place(\"place_no\")))\n",
    "      .select(\"m_name\", \"place_town\")\n",
    "      .withColumn(\"assoc\", lit(\"LIVES_IN\")))\n",
    "val t3 = (musician.join(performer, (musician(\"m_no\") === \"perf_is\"))\n",
    "      .join(plays_in, (performer(\"perf_no\") === plays_in(\"player\")))\n",
    "      .join(performance, (plays_in(\"band_id\") === performance(\"gave\")))\n",
    "      .join(concert, (performance(\"performed_in\") === concert(\"concert_no\")))\n",
    "      .join(place, (concert(\"concert_in\") === place(\"place_no\")))\n",
    "      .select(\"m_name\", \"place_town\")\n",
    "      .withColumn(\"assoc\", lit(\"PERFORMED_IN\")))\n",
    "val t4 = (musician.join(plays_in, (musician(\"m_no\") === plays_in(\"player\")))\n",
    "      .join(band, (plays_in(\"band_id\") === band(\"band_no\")))\n",
    "      .join(place, (band(\"band_home\") === place(\"place_no\")))\n",
    "      .select(\"m_name\", \"place_town\")\n",
    "      .withColumn(\"assoc\", lit(\"IN_BAND_IN\")))\n",
    "(t1.union(t2).union(t3).union(t4)\n",
    " .filter(col(\"place_town\") === \"Glasgow\")\n",
    " .select(\"m_name\", \"assoc\")\n",
    " .distinct()\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 15.\n",
    "**Jeff Dawn plays in a band with someone who plays in a band with Sue Little. Who is it and what are the bands?**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>m_name</th><th>Sue&quot;s band</th><th>Jeff&quot;s band</th>\n",
       "                </tr>\n",
       "                <tr><td>John Smith</td><td>BBSO</td><td>AASO</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36msue_band\u001b[39m: \u001b[32mDataFrame\u001b[39m = [band_id: int, band_name: string]\n",
       "\u001b[36msue_coplayers\u001b[39m: \u001b[32mDataFrame\u001b[39m = [band_id: int, m_no: int ... 11 more fields]\n",
       "\u001b[36mjeff_band\u001b[39m: \u001b[32mDataFrame\u001b[39m = [band_id: int, band_name: string]\n",
       "\u001b[36mjeff_coplayers\u001b[39m: \u001b[32mDataFrame\u001b[39m = [band_id: int, m_no: int ... 11 more fields]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val sue_band = (musician.filter(musician(\"m_name\") === \"Sue Little\")\n",
    "            .join(performer, (musician(\"m_no\") === performer(\"perf_is\")))\n",
    "            .join(plays_in, (performer(\"perf_no\") === plays_in(\"player\")))\n",
    "            .join(band, (plays_in(\"band_id\") === band(\"band_no\")))\n",
    "            .select(\"band_id\", \"band_name\"))\n",
    "val sue_coplayers = (musician\n",
    "                 .join(performer, (musician(\"m_no\") === performer(\"perf_is\")))\n",
    "                 .join(plays_in, (performer(\"perf_no\") === plays_in(\"player\")))\n",
    "                 .join(sue_band, \"band_id\"))\n",
    "val jeff_band = (musician.filter(musician(\"m_name\") === \"Jeff Dawn\")\n",
    "             .join(performer, (musician(\"m_no\") === performer(\"perf_is\")))\n",
    "             .join(plays_in, (performer(\"perf_no\") === plays_in(\"player\")))\n",
    "             .join(band, (plays_in(\"band_id\") === band(\"band_no\")))\n",
    "             .select(\"band_id\", \"band_name\"))\n",
    "val jeff_coplayers = (musician\n",
    "                 .join(performer, (musician(\"m_no\") === performer(\"perf_is\")))\n",
    "                 .join(plays_in, (performer(\"perf_no\") === plays_in(\"player\")))\n",
    "                 .join(jeff_band, \"band_id\"))\n",
    "(sue_coplayers.select(\n",
    "    col(\"m_no\"), col(\"band_name\").alias(\"Sue\\\"s band\"), col(\"m_name\"))\n",
    " .join(jeff_coplayers\n",
    "       .select(col(\"m_no\"), col(\"band_name\").alias(\"Jeff\\\"s band\")), \"m_no\")\n",
    " .select(\"m_name\", \"Sue\\\"s band\", \"Jeff\\\"s band\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".sc",
   "mimetype": "text/x-scala",
   "name": "scala",
   "nbconvert_exporter": "script",
   "version": "2.12.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
